### Replication code for Taylor C. Boas, F. Daniel Hidalgo, and Guillermo Toral. "Competence versus Priorities: Negative Electoral Responses to Education Quality in Brazil"
### This file builds the dataset "rdd_dataset.csv", with which all the regression discontinuity analyses included in the main text and in the Appendices (and replicable with the code in rdd_analyses.R) are done
### R version, platform, and package versions reported at the end of the file
### June 19, 2020

# PREPARE THE ENVIRONMENT -------------------------------------------------
# Set Working Directory to wherever this file is located.

# The directory where this file is located must also have a "figures" and a "tables" subdirectory
# Clean the environment
rm(list = ls())
# Load packages (make sure they are previously installed)
library(tidyverse); library(electionsBR); library(readxl); library(codebook)

# DOWNLOAD OR LOAD ELECTORAL DATA -------------------------------
# Download data on candidates and their performance in local elections from 2000 to 2016, from the site of Brazil's Supreme Electoral Court (TSE): http://www.tse.jus.br/eleicoes/estatisticas/repositorio-de-dados-eleitorais-1/repositorio-de-dados-eleitorais
# The data can easily be downloaded and organized using the electionsBR package.
# If you choose to download the data, uncomment the following lines -- otherwise load it using the code below

# d16 <- candidate_local(year=2016)
# d12 <- candidate_local(year=2012)
# d08 <- candidate_local(year=2008)
# d04 <- candidate_local(year=2004)
# d00 <- candidate_local(year=2000)
# 
# v16 <- vote_mun_zone_local(year=2016)
# v12 <- vote_mun_zone_local(year=2012)
# v08 <- vote_mun_zone_local(year=2008)
# v04 <- vote_mun_zone_local(year=2004)
# 
# save.image(file = "data/electionsBR_2000-2016.RData")

load("data/electionsBR_2000-2016.RData") # files starting with d and v have data on candidates and candidates' performance by year. Numbers correspond to election years, from 2000 to 2016

# EXTRACT DATA ON CANDIDATES ----------------------------------------------

# List of categories of candidates to exclude -- these are candidates whose candidacy was not validated or was cancelled by the electoral justice, or who died
invalid <- c("INDEFERIDO", "INDEFERIDO POR IMPUGNAÇÃO", "FALECIDO", "CASSADO", "CASSAÇÃO DO REGISTRO")

# Identify mayoral candidates running for the 2016 - 2020 term
candidates_1620 <- d16 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" & # Retain only candidates for mayoral office
                !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>% # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Identify those who were elected as mayors
mayors_1620 <- candidates_1620 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
# Identify municipalities where we observe multiple mayors being elected
repeated_muns <- mayors_1620[duplicated(mayors_1620$code_municipality_tse),] # 28
repeated_elections <- mayors_1620[which(mayors_1620$code_municipality_tse %in% repeated_muns$code_municipality_tse),] 
# Identify municipalities where we observe supplementary elections
supplementary_elections_1620 <- repeated_elections[which(repeated_elections$election_description!="Eleições Municipais 2016"),]
supplementary_elections_1620$supplementary_election <- 1
# Remove observations where there are repeated elections
candidates_1620 <- candidates_1620 %>% 
  dplyr::filter(!(code_municipality_tse %in% repeated_elections$code_municipality_tse))
candidates_1620$supplementary_election <- 0
# Add the supplementary elections
candidates_1620 <- rbind(candidates_1620, supplementary_elections_1620)
# Filter the mayors file
mayors_1620 <- candidates_1620 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
nrow(mayors_1620)==n_distinct(mayors_1620$code_municipality_tse) # 5550

# Identify mayoral candidates running for the 2012 - 2016 term
candidates_1216 <- d12 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" &  # Retain only candidates for mayoral office
                  !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>% # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Identify those who were elected as mayors
mayors_1216 <- candidates_1216 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
# Identify municipalities where we observe multiple mayors being elected
repeated_muns <- mayors_1216[duplicated(mayors_1216$code_municipality_tse),] # 28
repeated_elections <- mayors_1216[which(mayors_1216$code_municipality_tse %in% repeated_muns$code_municipality_tse),] 
# Identify municipalities where we observe supplementary elections
supplementary_elections_1216 <- repeated_elections[which(repeated_elections$election_description!="ELEIÇÃO MUNICIPAL 2012"),]
supplementary_elections_1216$supplementary_election <- 1
# Remove observations where there are repeated elections
candidates_1216 <- candidates_1216 %>% 
  dplyr::filter(!(code_municipality_tse %in% repeated_elections$code_municipality_tse))
candidates_1216$supplementary_election <- 0
# Add the supplementary elections
candidates_1216 <- rbind(candidates_1216, supplementary_elections_1216)
# Remove one municipality for which we observe two supplementary elections -- we do not know which one is valid
candidates_1216 <- subset(candidates_1216, candidates_1216$code_municipality_tse != "05312")
# Filter the mayors file
mayors_1216 <- candidates_1216 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
nrow(mayors_1216)==n_distinct(mayors_1216$code_municipality_tse) # 5567

# Identify mayoral candidates running for the 2008 - 2012 term
candidates_0812 <- d08 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" &  # Retain only candidates for mayoral office
                  !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>%  # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Identify those who were elected as mayors
mayors_0812 <- candidates_0812 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
# Identify municipalities where we observe multiple mayors being elected
repeated_muns <- mayors_0812[duplicated(mayors_0812$code_municipality_tse),] # 28
repeated_elections <- mayors_0812[which(mayors_0812$code_municipality_tse %in% repeated_muns$code_municipality_tse),] 
# Identify municipalities where we observe supplementary elections
supplementary_elections_0812 <- repeated_elections[which(repeated_elections$election_description== "ELEIÇÕES SUPLEMENTARES 2008"),]
supplementary_elections_0812$supplementary_election <- 1
# Remove observations where there are repeated elections
candidates_0812 <- candidates_0812 %>% 
  dplyr::filter(!(code_municipality_tse %in% repeated_elections$code_municipality_tse))
candidates_0812$supplementary_election <- 0
# Add the supplementary elections
candidates_0812 <- rbind(candidates_0812, supplementary_elections_0812)
# Remove two municipalities for which we observe two supplementary elections (we do not know which one is valid)
candidates_0812 <- subset(candidates_0812, candidates_0812$code_municipality_tse != "09156" & candidates_0812$code_municipality_tse != "43494")
# Filter the candidates file to keep only elected ones
mayors_0812 <- candidates_0812 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
nrow(mayors_0812)==n_distinct(mayors_0812$code_municipality_tse) # 5553

# Identify mayoral candidates running for the 2004 - 2008 term
candidates_0408 <- d04 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" & # Retain only candidates for mayoral office
                  !(DES_SITUACAO_CANDIDATURA %in% invalid) # And whose candidacy was not invalid
                  # DESC_SIT_TOT_TURNO != "#NULO#"
                  ) %>% # Whose performance is not considered null by the TSE
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# No supplementary elections reported in 2004
candidates_0408$supplementary_election <- 0
# Identify those who were elected as mayors
mayors_0408 <- candidates_0408 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
sum(duplicated(mayors_0408$code_municipality_tse)) # We do not observe two elected mayors for the same municipality, consistent with no supplementary elections
# Clean CPF codes
mayors_0408 <-mayors_0408 %>%
  dplyr::filter(cpf_candidate != "#NULO#") %>% # remove 1 case where mayor has no CPF
  dplyr::mutate(cpf_candidate = str_pad(cpf_candidate, 11, side = "left", pad="0"))
nrow(mayors_0408) # 5520 municipalities

# Identify mayors elected for the 2000 - 2004 term
mayors_0004 <- d00 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" & # Retain only candidates for mayoral office
                DESC_SIT_TOT_TURNO == "ELEITO" # Who were elected
                & !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>% # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Clean supplementary elections
# In 2000 there are no supplementary elections in the data, but there are municipalities with more than one elected mayor
mayors_0004$supplementary_election <- 0
# Remove duplicates, since we do not know which of these repeated elections was the valid one 
nrow(mayors_0004) # 5247 municipalities
duplicated_muns <- mayors_0004[which(duplicated(mayors_0004$code_municipality_tse)),]
nrow(duplicated_muns) # 17 are repeated
mayors_0004 <- mayors_0004 %>% 
  dplyr::filter(!(code_municipality_tse %in% duplicated_muns$code_municipality_tse))
# Clean CPF codes
mayors_0004 <-mayors_0004 %>%
  dplyr::filter(cpf_candidate != "#NULO#") %>% # remove 18 cases where mayor has no CPF
  dplyr::mutate(cpf_candidate = str_pad(cpf_candidate, 11, side = "left", pad="0"))
nrow(mayors_0004) # 5195 municipalities

# EXTRACT DATA ON VOTES ---------------------------------------------------
# Votes in the 2016 election
votes_1620 <- v16 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))
votes_1620_regular <- votes_1620 %>%
  dplyr::filter(!(code_municipality_tse %in% supplementary_elections_1620$code_municipality_tse))
votes_1620_supplementary <- votes_1620 %>%
  dplyr::filter(election_description != "ELEIÇÕES MUNICIPAIS 2016")
votes_1620 <- rbind(votes_1620_regular, votes_1620_supplementary)

# Votes in the 2012 election
votes_1216 <- v12 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))
votes_1216_regular <- votes_1216 %>%
  dplyr::filter(!(code_municipality_tse %in% supplementary_elections_1216$code_municipality_tse))
votes_1216_supplementary <- votes_1216 %>%
  dplyr::filter(election_description != "ELEIÇÃO MUNICIPAL 2012")
votes_1216 <- rbind(votes_1216_regular, votes_1216_supplementary)

# Votes in the 2008 election
votes_0812 <- v08 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))
votes_0812_regular <- votes_0812 %>%
  dplyr::filter(!(code_municipality_tse %in% supplementary_elections_0812$code_municipality_tse))
votes_0812_supplementary <- votes_0812 %>%
  dplyr::filter(election_description != "ELEIÇÕES 2008")
votes_0812 <- rbind(votes_0812_regular, votes_0812_supplementary)

# Votes in the 2004 election
votes_0408 <- v04 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))

# MERGE WITH IDENTIFIERS FOR ALL MUNICIPALITIES ---------------------

# Load identifiers of all 5570 municipalities (5 were created in 2013)
m <- read_csv("data/municipality_identifiers.csv")
m$code_municipality_tse <- str_pad(as.character(m$cod_tse), 5, side = "left", pad = "0") # Add leading zeroes (TSE codes are all 5-digit)

# Merge mayor data, keeping separate datasets for each election cycle
m16 <- left_join(m, mayors_1620)
m12 <- left_join(m, mayors_1216)
m08 <- left_join(m, mayors_0812)
m04 <- left_join(m, mayors_0408)
m00 <- left_join(m, mayors_0004)

# GENERATE INDICATOR FOR WHETHER MAYOR IS ALLOWED TO RUN, IS RE-ELECTED ----------------------
# Check whether CPF of the elected mayor coincides witht the CPF of the mayor elected in the previous term)
m16$incumbent_mayor_reelected <- NA
m12$incumbent_mayor_reelected <- NA
m08$incumbent_mayor_reelected <- NA
# And check whether the CPF of the elected mayor in the previous two terms coincides
m16$incumbent_mayor_cannot_run <- NA
m12$incumbent_mayor_cannot_run <- NA
m08$incumbent_mayor_cannot_run <- NA

for(i in 1:nrow(m16)){
  m16$incumbent_mayor_reelected[i] <- ifelse(m16$cpf_candidate[i] == m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"cpf_candidate"][[1]],1,0)
  m16$incumbent_mayor_cannot_run[i] <- ifelse(m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"cpf_candidate"][[1]] == m08[which(m08$code_municipality_tse==m16$code_municipality_tse[i]),"cpf_candidate"][[1]],1,0)
}
for(i in 1:nrow(m12)){
  m12$incumbent_mayor_reelected[i] <- ifelse(m12$cpf_candidate[i] == m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"cpf_candidate"][[1]],1,0)
  m12$incumbent_mayor_cannot_run[i] <- ifelse(m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"cpf_candidate"][[1]] == m04[which(m04$code_municipality_tse==m12$code_municipality_tse[i]),"cpf_candidate"][[1]],1,0)
}
for(i in 1:nrow(m08)){
  m08$incumbent_mayor_reelected[i] <- ifelse(m08$cpf_candidate[i] == m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"cpf_candidate"][[1]],1,0)
  m08$incumbent_mayor_cannot_run[i] <- ifelse(m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"cpf_candidate"][[1]] == m00[which(m00$code_municipality_tse==m08$code_municipality_tse[i]),"cpf_candidate"][[1]],1,0)
}

# MEASURE ELECTORAL PERFORMANCE OF THE INCUMBENT AND THE WINNER ------------------------------------------------------

# 2016 election 
m16$incumbent_mayor_ran <- NA
m16$incumbent_party_ran <- NA
m16$total_votes <- NA
m16$electoral_concentration <- NA
m16$incumbent_mayor_voteshare <- NA
m16$winner_mayor_voteshare <- NA
m16$incumbent_mayor_voteshare_round2 <- NA
m16$winner_mayor_voteshare_round2 <- NA

for(i in 1:nrow(m16)){
  if(!is.na(m16$cpf_candidate[i])){ # For all municipalities for which we have a valid mayor CPF
    # Subset to votation and candidate data in the municipality
    votes_here <- subset(votes_1620, votes_1620$code_municipality_tse == m16$code_municipality_tse[i])
    candidates_here <- subset(candidates_1620, candidates_1620$code_municipality_tse == m16$code_municipality_tse[i])
    # Get idenfiers of the incumbent and the winner
    cpf_incumbent <- m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"cpf_candidate"][[1]]
    code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent & candidates_here$round==1),"code_candidate"][[1]]
    code_winner <- candidates_here[which(candidates_here$candidate_result=="ELEITO"),"code_candidate"][[1]]
    # Check if the incumbent (winner of the previous election) ran
    m16$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate, 1, 0)
    # Check if the incumbent party (party of the winner of the previous election) ran
    m16$incumbent_party_ran[i] <- ifelse(m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"code_party"][[1]] %in% votes_here$code_party, 1, 0)
    # Create table of voting tallies by candidate, in first round
    votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    # Electoral concentration (Herfindahl index)
    m16$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
    # Vote numbers in the first round
    # Sum number of votes
    m16$total_votes[i] <- sum(votes_here_by_candidate$votes)
    # Vote share of incumbent
    m16$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    m16$winner_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    if(m16$round[i] == 2){ # If election was won in the second round, record also second-round vote share
      votes_here_by_candidate_round2 <- subset(votes_here, votes_here$round==2) %>% 
        group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
        dplyr::summarise(votes = sum(votes))
      m16$incumbent_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
      m16$winner_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
    }
  }
}

# 2012 election 
m12$incumbent_mayor_ran <- NA
m12$incumbent_party_ran <- NA
m12$total_votes <- NA
m12$electoral_concentration <- NA
m12$incumbent_mayor_voteshare <- NA
m12$winner_mayor_voteshare <- NA
m12$incumbent_mayor_voteshare_round2 <- NA
m12$winner_mayor_voteshare_round2 <- NA

for(i in 1:nrow(m12)){
  # For all municipalities for which we have a valid mayor CPF
  if(!is.na(m12$cpf_candidate[i])){
    # Subset to votation and candidate data in the municipality
    votes_here <- subset(votes_1216, votes_1216$code_municipality_tse == m12$code_municipality_tse[i])
    candidates_here <- subset(candidates_1216, candidates_1216$code_municipality_tse == m12$code_municipality_tse[i])
    # Get idenfiers of the incumbent and the winner
    cpf_incumbent <- m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"cpf_candidate"][[1]]
    code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent  & candidates_here$round==1),"code_candidate"][[1]]
    code_winner <- candidates_here[which(candidates_here$candidate_result=="ELEITO"),"code_candidate"][[1]]
    # Check if the incumbent (winner of the previous election) ran
    m12$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate, 1, 0)
    # Check if the incumbent party (party of the winner of the previous election) ran
    m12$incumbent_party_ran[i] <- ifelse(m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"code_party"][[1]] %in% votes_here$code_party, 1, 0)
    # Create table of voting tallies by candidate, in first round
    votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    # Electoral concentration (Herfindahl index)
    m12$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
    # Vote numbers in the first round
    # Sum number of votes
    m12$total_votes[i] <- sum(votes_here_by_candidate$votes)
    # Vote share of incumbent
    m12$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    m12$winner_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    if(m12$round[i] == 2){ # If election was won in the second round, record also second-round vote share
      votes_here_by_candidate_round2 <- subset(votes_here, votes_here$round==2) %>% 
        group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
        dplyr::summarise(votes = sum(votes))
      m12$incumbent_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
      m12$winner_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
    }
  }
}

# 2008 election
m08$incumbent_mayor_ran <- NA
m08$incumbent_party_ran <- NA
m08$total_votes <- NA
m08$electoral_concentration <- NA
m08$incumbent_mayor_voteshare <- NA
m08$winner_mayor_voteshare <- NA
m08$incumbent_mayor_voteshare_round2 <- NA
m08$winner_mayor_voteshare_round2 <- NA

for(i in 1:nrow(m08)){
  # For all municipalities for which we have a valid mayor CPF
  if(!is.na(m08$cpf_candidate[i])){
    # Subset to votation and candidate data in the municipality
    votes_here <- subset(votes_0812, votes_0812$code_municipality_tse == m08$code_municipality_tse[i])
    candidates_here <- subset(candidates_0812, candidates_0812$code_municipality_tse == m08$code_municipality_tse[i])
    # Get idenfiers of the incumbent and the winner
    cpf_incumbent <- m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"cpf_candidate"][[1]]
    code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent & candidates_here$round==1),"code_candidate"][[1]]
    code_winner <- candidates_here[which(candidates_here$candidate_result=="ELEITO"),"code_candidate"][[1]]
    # Check if the incumbent (winner of the previous election) ran
    m08$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate, 1, 0)
    # Check if the incumbent party (party of the winner of the previous election) ran
    m08$incumbent_party_ran[i] <- ifelse(m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"code_party"][[1]] %in% votes_here$code_party, 1, 0)
    # Create table of voting tallies by candidate, in first round
    votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    # Electoral concentration (Herfindahl index)
    m08$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
    # Vote numbers in the first round
    # Sum number of votes
    m08$total_votes[i] <- sum(votes_here_by_candidate$votes)
    # Vote share of incumbent
    m08$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    m08$winner_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    if(m08$round[i] == 2){ # If election was won in the second round, record also second-round vote share
      votes_here_by_candidate_round2 <- subset(votes_here, votes_here$round==2) %>% 
        group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
        dplyr::summarise(votes = sum(votes))
      m08$incumbent_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
      m08$winner_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
    }
  }
}

# 2004 election
m04$incumbent_mayor_ran <- NA
m04$incumbent_party_ran <- NA
m04$total_votes <- NA
m04$electoral_concentration <- NA
m04$incumbent_mayor_voteshare <- NA
m04$winner_mayor_voteshare <- NA
m04$incumbent_mayor_voteshare_round2 <- NA
m04$winner_mayor_voteshare_round2 <- NA

for(i in 1:nrow(m04)){
  # For all municipalities for which we have a valid mayor CPF
  if(!is.na(m04$cpf_candidate[i])){
    # Subset to votation and candidate data in the municipality
    votes_here <- subset(votes_0408, votes_0408$code_municipality_tse == m04$code_municipality_tse[i])
    candidates_here <- subset(candidates_0408, candidates_0408$code_municipality_tse == m04$code_municipality_tse[i])
    # Get idenfiers of the incumbent and the winner
    cpf_incumbent <- m04[which(m04$code_municipality_tse==m04$code_municipality_tse[i]),"cpf_candidate"][[1]]
    code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent & candidates_here$round==1),"code_candidate"][[1]]
    code_winner <- candidates_here[which(candidates_here$candidate_result=="ELEITO"),"code_candidate"][[1]]
    # Check if the incumbent (winner of the previous election) ran
    m04$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate, 1, 0)
    # Check if the incumbent party (party of the winner of the previous election) ran
    m04$incumbent_party_ran[i] <- ifelse(m04[which(m04$code_municipality_tse==m04$code_municipality_tse[i]),"code_party"][[1]] %in% votes_here$code_party, 1, 0)
    # Create table of voting tallies by candidate, in first round
    votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    # Electoral concentration (Herfindahl index)
    m04$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
    # Vote numbers in the first round
    # Sum number of votes
    m04$total_votes[i] <- sum(votes_here_by_candidate$votes)
    # Vote share of incumbent
    m04$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    m04$winner_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    if(m04$round[i] == 2){ # If election was won in the second round, record also second-round vote share
      votes_here_by_candidate_round2 <- subset(votes_here, votes_here$round==2) %>% 
        group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
        dplyr::summarise(votes = sum(votes))
      m04$incumbent_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
      m04$winner_mayor_voteshare_round2[i] <- sum(votes_here_by_candidate_round2[which(votes_here_by_candidate_round2$code_candidate==code_winner),"votes"][[1]]/sum(votes_here_by_candidate_round2$votes))
    }
  }
}

# GENERATE CONTROL VARIABLES -------------------------

# Indicators for election cycle
m16$electoral_cycle <- 2016
m16$electoral_cycle_2016 <- 1
m16$electoral_cycle_2012 <- 0
m12$electoral_cycle <- 2012
m12$electoral_cycle_2016 <- 0
m12$electoral_cycle_2012 <- 1
m08$electoral_cycle <- 2008
m08$electoral_cycle_2016 <- 0
m08$electoral_cycle_2012 <- 0

# Indicators for whether the incumbent mayor belongs to one of three major parties: PT, PSDB, PMDB
m16$incumbent_party_pt <- NA
m16$incumbent_party_psdb <- NA
m16$incumbent_party_pmdb <- NA
m12$incumbent_party_pt <- NA
m12$incumbent_party_psdb <- NA
m12$incumbent_party_pmdb <- NA
m08$incumbent_party_pt <- NA
m08$incumbent_party_psdb <- NA
m08$incumbent_party_pmdb <- NA

for(i in 1:nrow(m16)){
  incumbent_party <- m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"code_party"]
  m16$incumbent_party_pt[i] <- ifelse(incumbent_party==13,1,0) # Each party has a unique identifier assigned by TSE: http://www.tse.jus.br/partidos/partidos-politicos/registrados-no-tse
  m16$incumbent_party_psdb[i] <- ifelse(incumbent_party==45,1,0)
  m16$incumbent_party_pmdb[i] <- ifelse(incumbent_party==15,1,0)
}
for(i in 1:nrow(m12)){
  incumbent_party <- m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"code_party"]
  m12$incumbent_party_pt[i] <- ifelse(incumbent_party==13,1,0)
  m12$incumbent_party_psdb[i] <- ifelse(incumbent_party==45,1,0)
  m12$incumbent_party_pmdb[i] <- ifelse(incumbent_party==15,1,0)
}
for(i in 1:nrow(m08)){
  incumbent_party <- m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"code_party"]
  m08$incumbent_party_pt[i] <- ifelse(incumbent_party==13,1,0)
  m08$incumbent_party_psdb[i] <- ifelse(incumbent_party==45,1,0)
  m08$incumbent_party_pmdb[i] <- ifelse(incumbent_party==15,1,0)
}

# Electoral performance of the incumbent in the previous election
m16$incumbent_mayor_voteshare_previous <- NA
m12$incumbent_mayor_voteshare_previous <- NA
m08$incumbent_mayor_voteshare_previous <- NA
for(i in 1:nrow(m16)){
  m16$incumbent_mayor_voteshare_previous[i] <- m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"winner_mayor_voteshare"][[1]]
}
for(i in 1:nrow(m12)){
  m12$incumbent_mayor_voteshare_previous[i] <- m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"winner_mayor_voteshare"][[1]]
}
for(i in 1:nrow(m08)){
  m08$incumbent_mayor_voteshare_previous[i] <- m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"winner_mayor_voteshare"][[1]]
}

# MERGE IDEB DATA ----------------------------------------------------
## Primary school IDEB
# Import IDEB performance data at the municipality level
ideb <- data.frame(read_excel('data/divulgacao_anos_iniciais_municipios2017-atualizado-Jun_2019.xlsx',skip=9)) # File downloaded from the site of INEP: http://download.inep.gov.br/educacao_basica/portal_ideb/planilhas_para_download/2017/divulgacao_anos_iniciais_municipios2017-atualizado-Jun_2019.zip
ideb <- ideb[which(ideb$REDE=='Municipal'),grep('^COD_MUN|P14|P4|PAD14|^IDEB14|PROJ14',names(ideb))] # Keep only measures for municipal schools, and variables of interest
ideb <- data.frame(apply(ideb,2,as.numeric)) 

ideb_2007_2015 <- with(ideb,data.frame(
  ibge = rep(COD_MUN,5),
  ideb_year = c(rep(2007,nrow(ideb)),rep(2009,nrow(ideb)),rep(2011,nrow(ideb)),rep(2013,nrow(ideb)),rep(2015,nrow(ideb))),
  ideb = c(IDEB14_07,IDEB14_09,IDEB14_11,IDEB14_13,IDEB14_15),
  ideb_cont = c(P14_07*PAD14_07,P14_09*PAD14_09,P14_11*PAD14_11,P14_13*PAD14_13,P14_15*PAD14_15), # Continuous IDEB score, obtained by multiplying student performance by passing rates
  ideb_target = c(PROJ14_07, PROJ14_09, PROJ14_11, PROJ14_13, PROJ14_15),
  ideb_gap = c(P14_07*PAD14_07-PROJ14_07, P14_09*PAD14_09-PROJ14_09, P14_11*PAD14_11-PROJ14_11, P14_13*PAD14_13-PROJ14_13, P14_15*PAD14_15-PROJ14_15), # Continuous IDEB score minus target
  ideb_gap_centered = c(P14_07*PAD14_07-PROJ14_07+0.05, P14_09*PAD14_09-PROJ14_09+0.05, P14_11*PAD14_11-PROJ14_11+0.05, P14_13*PAD14_13-PROJ14_13+0.05, P14_15*PAD14_15-PROJ14_15+0.05)))

ideb_2007 <- subset(ideb_2007_2015, ideb_2007_2015$ideb_year==2007) %>%
  dplyr::select(-ideb_year)
ideb_2011 <- subset(ideb_2007_2015, ideb_2007_2015$ideb_year==2011) %>%
  dplyr::select(-ideb_year)
ideb_2015 <- subset(ideb_2007_2015, ideb_2007_2015$ideb_year==2015) %>%
  dplyr::select(-ideb_year)

m16 <- left_join(m16, ideb_2015)
m12 <- left_join(m12, ideb_2011)
m08 <- left_join(m08, ideb_2007)

## Middle school IDEB
# Import IDEB performance data at the municipality level
ideb_ef2 <- data.frame(read_excel('data/divulgacao_anos_finais_municipios2017-atualizado-Jun_2019.xlsx',skip=9)) # File downloaded from the site of INEP: http://download.inep.gov.br/educacao_basica/portal_ideb/planilhas_para_download/2017/divulgacao_anos_finais_municipios2017-atualizado-Jun_2019.zip
ideb_ef2 <- ideb_ef2[which(ideb_ef2$REDE=='Municipal'),grep('^COD_MUN|P58|PAD58|^IDEB58|PROJ58',names(ideb_ef2))] # Keep only measures for municipal schools, and variables of interest
ideb_ef2 <- data.frame(apply(ideb_ef2,2,as.numeric)) 

ideb_ef2_2007_2015 <- with(ideb_ef2,data.frame(
  ibge = rep(COD_MUN,5),
  ideb_year = c(rep(2007,nrow(ideb_ef2)),rep(2009,nrow(ideb_ef2)),rep(2011,nrow(ideb_ef2)),rep(2013,nrow(ideb_ef2)),rep(2015,nrow(ideb_ef2))),
  ideb_ef2 = c(IDEB58_07,IDEB58_09,IDEB58_11,IDEB58_13,IDEB58_15),
  ideb_cont_ef2 = c(P58_07*PAD58_07,P58_09*PAD58_09,P58_11*PAD58_11,P58_13*PAD58_13,P58_15*PAD58_15), # Continuous IDEB score, obtained by multiplying student performance by passing rates
  ideb_target_ef2 = c(PROJ58_07, PROJ58_09, PROJ58_11, PROJ58_13, PROJ58_15),
  ideb_gap_ef2 = c(P58_07*PAD58_07-PROJ58_07, P58_09*PAD58_09-PROJ58_09, P58_11*PAD58_11-PROJ58_11, P58_13*PAD58_13-PROJ58_13, P58_15*PAD58_15-PROJ58_15))) # Continuous IDEB score minus target

ideb_ef2_2007 <- subset(ideb_ef2_2007_2015, ideb_ef2_2007_2015$ideb_year==2007) %>%
  dplyr::select(-ideb_year)
ideb_ef2_2011 <- subset(ideb_ef2_2007_2015, ideb_ef2_2007_2015$ideb_year==2011) %>%
  dplyr::select(-ideb_year)
ideb_ef2_2015 <- subset(ideb_ef2_2007_2015, ideb_ef2_2007_2015$ideb_year==2015) %>%
  dplyr::select(-ideb_year)

m16 <- left_join(m16, ideb_ef2_2015)
m12 <- left_join(m12, ideb_ef2_2011)
m08 <- left_join(m08, ideb_ef2_2007)

# Create indicator for whether there are two school quality signals
m16$ef1_only <- ifelse(is.na(m16$ideb_gap_ef2),1,0)
m12$ef1_only <- ifelse(is.na(m12$ideb_gap_ef2),1,0)
m08$ef1_only <- ifelse(is.na(m08$ideb_gap_ef2),1,0)

# MERGE SOCIOECONOMIC COVARIATES ------------------------------------------

# Population (logged)
# Data downloaded from the site of IBGE: https://www.ibge.gov.br/estatisticas/sociais/populacao/9103-estimativas-de-populacao.html
pop08 <- read_csv("data/population_2008.csv") %>%
  dplyr::mutate(cod_ibge = as.numeric(substr(paste(cod_uf, cod_municipio, sep=""),1,6)),
                population = populacao_2008,
                population_log = log(populacao_2008)) %>%
  dplyr::select(cod_ibge, population, population_log)
pop12 <- read_csv("data/population_2012.csv") %>%
  dplyr::mutate(cod_ibge = as.numeric(substr(paste(cod_uf, cod_municipio, sep=""),1,6)),
                population = as.numeric(populacao_2012),
                population_log = log(population)) %>%
  dplyr::select(cod_ibge, population, population_log)
pop16 <- read_csv("data/population_2016.csv") %>%
  dplyr::mutate(cod_municipio = str_pad(cod_municipio, 5, side = "left", pad="0"),
                cod_ibge = as.numeric(substr(paste(cod_uf, cod_municipio, sep=""),1,6)),
                population = as.numeric(populacao_2016),
                population_log = log(population)) %>%
  dplyr::select(cod_ibge, population, population_log)
m16 <- left_join(m16, pop16)
m12 <- left_join(m12, pop12)
m08 <- left_join(m08, pop08)

# Share of public employees who are tenured (as opposed to temporary)
f08 <- read_csv("data/ibge_numfuncionarios_2008.csv") %>%
  dplyr::mutate(tenured_employees_share = funcionarios_estatutarios_2008 / funcionarios_2008) %>%
  dplyr::select(cod_ibge, tenured_employees_share)
f12 <- read_csv("data/ibge_numfuncionarios_2012.csv") %>%
  dplyr::mutate(tenured_employees_share = funcionarios_estatutarios_2012 / funcionarios_2012) %>%
  dplyr::select(cod_ibge, tenured_employees_share)
f16 <- read_csv("data/ibge_numfuncionarios_2015.csv") %>%
  dplyr::mutate(tenured_employees_share = funcionarios_estatutarios_2015 / funcionarios_2015) %>%
  dplyr::select(cod_ibge, tenured_employees_share)
m16 <- left_join(m16, f16, by=c("ibge" = "cod_ibge")) # File f16 uses the 7-digit IBGE codes
m12 <- left_join(m12, f12)
m08 <- left_join(m08, f08)

# Percent of the population who are poor (with household per capita income below R$140)
hd <- read_csv("data/human_development_atlas.csv") %>% # from the Human Development Atlas, http://atlasbrasil.org.br/2013/pt/download/
  dplyr::select(cod_ibge = ibge, share_poor = pc_pobres)
m16 <- left_join(m16, hd)
m12 <- left_join(m12, hd)
m08 <- left_join(m08, hd)

# Local media stations (observed only in 2012)
media <- read_csv("data/local_media_2012.csv") %>% # From IBGE's "Perfil dos municípios 2012" variables A203 - A206, downloadable from https://www.ibge.gov.br/estatisticas/sociais/saude/10586-pesquisa-de-informacoes-basicas-municipais.html?=&t=downloads
  dplyr::mutate(local_media_2012 = radio_am_2012 + radio_fm_2012 + radio_comun_2012 + geradora_tv_2012) %>%
  dplyr::select(cod_ibge = cod_mun, local_media_2012)
m16 <- left_join(m16, media)
m12 <- left_join(m12, media)
m08 <- left_join(m08, media)
                
# MERGE DATA ON ENROLMENTS IN MUNICIPAL SCHOOLS ---------------------------

## Note: This data is obtained by processing large, administrative datasets on enrolments in all Brazilian schools included in Brazil's school census ("Censo Escolar"), which can be downloaded at http://portal.inep.gov.br/web/guest/microdados
## The original files are multiple gygabites of data, so here we simply import the processed datasets for 2008, 2012, and 2016. 
## Below, commented out, is the code you could use to build these datasets from scratch, starting from the "Censo escolar" data files donwloadable from http://portal.inep.gov.br/web/guest/microdados. You would need to download the files for 2008, 2012, and 2016, and unzip them, and rename the three folders "Censo escolar 2016", "Censo escolar 2012", and "Censo escolar 2008". 

# Share of municipal enorlments
enrolments_2008 <- read_csv("data/matriculas_2008.csv") %>%
  dplyr::select(cod_ibge,
                enrolment_mun_ef1 = enrolment_mun_ef1_2008)
enrolments_2012 <- read_csv("data/matriculas_2012.csv") %>%
  dplyr::select(cod_ibge,
                enrolment_mun_ef1 = enrolment_mun_ef1_2012)
enrolments_2016 <- read_csv("data/matriculas_2016.csv") %>%
  dplyr::select(cod_ibge,
                enrolment_mun_ef1 = enrolment_mun_ef1_2016)
m16 <- left_join(m16, enrolments_2016, by = "cod_ibge")
m12 <- left_join(m12, enrolments_2012, by = "cod_ibge")
m08 <- left_join(m08, enrolments_2008, by = "cod_ibge")

## Code used to build "matriculas_2008.csv", "matriculas_2012.csv" and "matriculas_2016.csv"
## The data on enrolments ("matrículas") is split by region, so we process one region at a time and then merge them
# # 2016
# matricula_co <- read_delim("Censo escolar 2016/DADOS/MATRICULA_CO.csv", delim="|")
# dim(matricula_co)
# ef <- c(4:21, 41)
# matricula_co_sum <- matricula_co %>%
#   mutate(privada = ifelse(matricula_co$TP_DEPENDENCIA==4,1,0),
#          privada_ef1 = ifelse(matricula_co$TP_DEPENDENCIA==4 & matricula_co$TP_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_co$TP_DEPENDENCIA==3 & matricula_co$TP_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_co$TP_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_co$CO_MUNICIPIO,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_ne <- read_delim("Censo escolar 2016/DADOS/MATRICULA_NORDESTE.csv", delim="|")
# dim(matricula_ne)
# matricula_ne_sum <- matricula_ne %>%
#   mutate(privada = ifelse(matricula_ne$TP_DEPENDENCIA==4,1,0),
#          privada_ef1 = ifelse(matricula_ne$TP_DEPENDENCIA==4 & matricula_ne$TP_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_ne$TP_DEPENDENCIA==3 & matricula_ne$TP_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_ne$TP_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_ne$CO_MUNICIPIO,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_no <- read_delim("Censo escolar 2016/DADOS/MATRICULA_NORTE.csv", delim="|")
# dim(matricula_no)
# matricula_no_sum <- matricula_no %>%
#   mutate(privada = ifelse(matricula_no$TP_DEPENDENCIA==4,1,0),
#          privada_ef1 = ifelse(matricula_no$TP_DEPENDENCIA==4 & matricula_no$TP_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_no$TP_DEPENDENCIA==3 & matricula_no$TP_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_no$TP_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_no$CO_MUNICIPIO,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_se <- read_delim("Censo escolar 2016/DADOS/MATRICULA_SUDESTE.csv", delim="|")
# dim(matricula_se)
# matricula_se_sum <- matricula_se %>%
#   mutate(privada = ifelse(matricula_se$TP_DEPENDENCIA==4,1,0),
#          privada_ef1 = ifelse(matricula_se$TP_DEPENDENCIA==4 & matricula_se$TP_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_se$TP_DEPENDENCIA==3 & matricula_se$TP_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_se$TP_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_se$CO_MUNICIPIO,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_sul <- read_delim("Censo escolar 2016/DADOS/MATRICULA_SUL.csv", delim="|")
# dim(matricula_sul)
# matricula_sul_sum <- matricula_sul %>%
#   mutate(privada = ifelse(matricula_sul$TP_DEPENDENCIA==4,1,0),
#          privada_ef1 = ifelse(matricula_sul$TP_DEPENDENCIA==4 & matricula_sul$TP_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_sul$TP_DEPENDENCIA==3 & matricula_sul$TP_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_sul$TP_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_sul$CO_MUNICIPIO,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matriculas_2016 <- rbind(matricula_co_sum, matricula_ne_sum, matricula_no_sum, matricula_se_sum, matricula_sul_sum)
# matriculas_2016$ratio_pvt_mun <- matriculas_2016$privadas_ef1/matriculas_2016$municipal_ef1
# matriculas_2016$ratio_pvt_total <- matriculas_2016$privadas_ef1/matriculas_2016$total_ef1
# matriculas_2016$share_enrolment_mun_ef1 <- matriculas_2016$municipal_ef1/matriculas_2016$total_ef1
# plot(log(matriculas_2016$total_ef1),log(matriculas_2016$ratio_pvt_total)) # Larger cities tend to have larger share of private
# colnames(matriculas_2016) <- c("cod_ibge", "enrolment_pvt_ef1_2016", "enrolment_pvt_2016", "enrolment_mun_ef1_2016",
#                                "enrolment_total_ef1_2016", "ratio_enrolment_pvt_mun_2016", "share_enrolment_pvt_2016",
#                                "share_enrolment_mun_2016")
# 
# write_csv(matriculas_2016, "matriculas_2016.csv")
# 
# # 2012
# ef <- c(4:21,41)
# matricula_co <- read_delim("Censo escolar 2012/DADOS/MATRICULA_CO.csv", delim="|")
# matricula_co_sum <- matricula_co %>%
#   mutate(privada = ifelse(matricula_co$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_co$ID_DEPENDENCIA_ADM_ESC==4 & matricula_co$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_co$ID_DEPENDENCIA_ADM_ESC==3 & matricula_co$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_co$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_co$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_ne <- read_delim("Censo escolar 2012/DADOS/MATRICULA_NORDESTE.csv", delim="|")
# matricula_ne_sum <- matricula_ne %>%
#   mutate(privada = ifelse(matricula_ne$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_ne$ID_DEPENDENCIA_ADM_ESC==4 & matricula_ne$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_ne$ID_DEPENDENCIA_ADM_ESC==3 & matricula_ne$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_ne$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_ne$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_no <- read_delim("Censo escolar 2012/DADOS/MATRICULA_NORTE.csv", delim="|")
# dim(matricula_no)
# matricula_no_sum <- matricula_no %>%
#   mutate(privada = ifelse(matricula_no$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_no$ID_DEPENDENCIA_ADM_ESC==4 & matricula_no$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_no$ID_DEPENDENCIA_ADM_ESC==3 & matricula_no$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_no$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_no$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_se <- read_delim("Censo escolar 2012/DADOS/MATRICULA_SUDESTE.csv", delim="|")
# dim(matricula_se)
# matricula_se_sum <- matricula_se %>%
#   mutate(privada = ifelse(matricula_se$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_se$ID_DEPENDENCIA_ADM_ESC==4 & matricula_se$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_se$ID_DEPENDENCIA_ADM_ESC==3 & matricula_se$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_se$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_se$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_sul <- read_delim("Censo escolar 2012/DADOS/MATRICULA_SUL.csv", delim="|")
# dim(matricula_sul)
# matricula_sul_sum <- matricula_sul %>%
#   mutate(privada = ifelse(matricula_sul$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_sul$ID_DEPENDENCIA_ADM_ESC==4 & matricula_sul$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_sul$ID_DEPENDENCIA_ADM_ESC==3 & matricula_sul$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_sul$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_sul$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matriculas_2012 <- rbind(matricula_co_sum, matricula_ne_sum, matricula_no_sum, matricula_se_sum, matricula_sul_sum)
# matriculas_2012$ratio_pvt_mun <- matriculas_2012$privadas_ef1/matriculas_2012$municipal_ef1
# matriculas_2012$ratio_pvt_total <- matriculas_2012$privadas_ef1/matriculas_2012$total_ef1
# matriculas_2012$share_enrolment_mun_ef1 <- matriculas_2012$municipal_ef1/matriculas_2012$total_ef1
# colnames(matriculas_2012) <- c("cod_ibge", "enrolment_pvt_ef1_2012", "enrolment_pvt_2012", "enrolment_mun_ef1_2012",
#                                "enrolment_total_ef1_2012", "ratio_enrolment_pvt_mun_2012", "share_enrolment_pvt_2012",
#                                "share_enrolment_mun_2012")
# 
# write_csv(matriculas_2012, "data/matriculas_2012.csv")
# 
# # 2008
# ef <- c(4:21,41)
# matricula_co <- read_delim("Censo escolar 2008/DADOS/MATRICULA_CO.csv", delim="|")
# matricula_co_sum <- matricula_co %>%
#   mutate(privada = ifelse(matricula_co$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_co$ID_DEPENDENCIA_ADM_ESC==4 & matricula_co$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_co$ID_DEPENDENCIA_ADM_ESC==3 & matricula_co$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_co$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_co$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_ne <- read_delim("Censo escolar 2008/DADOS/MATRICULA_NORDESTE.csv", delim="|")
# matricula_ne_sum <- matricula_ne %>%
#   mutate(privada = ifelse(matricula_ne$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_ne$ID_DEPENDENCIA_ADM_ESC==4 & matricula_ne$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_ne$ID_DEPENDENCIA_ADM_ESC==3 & matricula_ne$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_ne$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_ne$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_no <- read_delim("Censo escolar 2008/DADOS/MATRICULA_NORTE.csv", delim="|")
# matricula_no_sum <- matricula_no %>%
#   mutate(privada = ifelse(matricula_no$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_no$ID_DEPENDENCIA_ADM_ESC==4 & matricula_no$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_no$ID_DEPENDENCIA_ADM_ESC==3 & matricula_no$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_no$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_no$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_se <- read_delim("Censo escolar 2008/DADOS/MATRICULA_SUDESTE.csv", delim="|")
# matricula_se_sum <- matricula_se %>%
#   mutate(privada = ifelse(matricula_se$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_se$ID_DEPENDENCIA_ADM_ESC==4 & matricula_se$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_se$ID_DEPENDENCIA_ADM_ESC==3 & matricula_se$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_se$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_se$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matricula_sul <- read_delim("Censo escolar 2008/DADOS/MATRICULA_SUL.csv", delim="|")
# matricula_sul_sum <- matricula_sul %>%
#   mutate(privada = ifelse(matricula_sul$ID_DEPENDENCIA_ADM_ESC==4,1,0),
#          privada_ef1 = ifelse(matricula_sul$ID_DEPENDENCIA_ADM_ESC==4 & matricula_sul$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          municipal_ef1 = ifelse(matricula_sul$ID_DEPENDENCIA_ADM_ESC==3 & matricula_sul$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          total_ef1 = ifelse(matricula_sul$FK_COD_ETAPA_ENSINO %in% ef,1,0),
#          co_ibge = as.integer(str_sub(matricula_sul$COD_MUNICIPIO_ESCOLA,1,6))) %>%
#   group_by(co_ibge) %>%
#   summarise(privadas_ef1 = sum(privada_ef1), privadas = sum(privada), municipal_ef1 = sum(municipal_ef1), total_ef1 = sum(total_ef1))
# 
# matriculas_2008 <- rbind(matricula_co_sum, matricula_ne_sum, matricula_no_sum, matricula_se_sum, matricula_sul_sum)
# matriculas_2008$ratio_pvt_mun <- matriculas_2008$privadas_ef1/matriculas_2008$municipal_ef1
# matriculas_2008$ratio_pvt_total <- matriculas_2008$privadas_ef1/matriculas_2008$total_ef1
# matriculas_2008$share_enrolment_mun_ef1 <- matriculas_2008$municipal_ef1/matriculas_2008$total_ef1
# colnames(matriculas_2008) <- c("cod_ibge", "enrolment_pvt_ef1_2008", "enrolment_pvt_2008", "enrolment_mun_ef1_2008",
#                                "enrolment_total_ef1_2008", "ratio_enrolment_pvt_mun_2008", "share_enrolment_pvt_2008",
#                                "share_enrolment_mun_2008")
# write_csv(matriculas_2008, "data/matriculas_2008.csv")

# ASSEMBLE LONG DATASET, GENERATE COVARIATES --------------------------------------------------

m <- rbind(m16, m12, m08) %>% # Bind observations for all 3 cycles together
  arrange(cod_ibge, electoral_cycle) # Order observations by municipality, election

# Treated indicator if IDEB target was met
m$treated <- ifelse(m$ideb_gap_centered>=0,1,0)
# Municipal school enrolments as a share of the population
m$share_pop_enroled_mun <- m$enrolment_mun_ef1/m$population
m$share_pop_enroled_mun_high <- ifelse(m$share_pop_enroled_mun>quantile(m$share_pop_enroled_mun,.75,na.rm=T)[[1]],1,0)
m$treated_share_pop_enroled_mun_high <- m$treated*m$share_pop_enroled_mun_high
m$treated_forcing_share_pop_enroled_mun_high <- m$treated_share_pop_enroled_mun_high*m$ideb_gap_centered
m$forcing_share_pop_enroled_mun_high <- m$ideb_gap_centered*m$share_pop_enroled_mun_high

# EXPORT DATASET ----------------------------------------------------------

write_csv(m, "data/rdd_dataset.csv")

# GENERATE CODEBOOK -------------------------------------------------------

var_label(m) <- list(
  ibge = "7-digit IBGE municipality identifier",
  nom_mun = "Municipality name with state identifier",
  cod_ibge = "6-digit IBGE municipality identifier",
  sigla_UF = "2-letter state identifier",
  cod_tse = "TSE municipality identifier",
  nome_Municipio = "Municipality name with no special characters",
  code_municipality_tse = "5-digit TSE municipality identifier",
  code_candidate = "TSE candidate code for the elected candidate",
  cpf_candidate = "CPF (unique personal identifier issued by the Receita Federal) for the elected candidate",
  code_party = "2-digit TSE code for the party of the candidate elected as mayor",
  name_party = "TSE acronym for the party of the candidate elected as mayor",
  round = "Electoral round in which the election was decided",
  candidate_situation = "Situation of the candidacy before the TSE",
  candidate_result = "Electoral result for the candidate who got elected as mayor",
  election_description = "Description of the election where the mayor got elected",
  supplementary_election = "Indicator for whether the election where the mayor got elected was supplementary",
  incumbent_mayor_reelected = "Indicator for whether the incumbent mayor was reelected",
  incumbent_mayor_cannot_run = "Indicator for whether the incumbent mayor was elected also on the previous term, and therefore is not legally allowed to run",
  incumbent_mayor_ran = "Indicator for whether the incumbent mayor runs",
  incumbent_party_ran = "Indicator for whether the incumbent party runs",
  total_votes = "Total number of votes cast in the election",
  electoral_concentration = "Herfindahl index of electoral concentration in the first round of the election",
  incumbent_mayor_voteshare = "Vote share of the incumbent mayor",
  winner_mayor_voteshare = "Vote share of the candidate who is elected as mayor",
  incumbent_mayor_voteshare_round2 = "Vote share of the incumbent mayor in the second round, when one is held",
  winner_mayor_voteshare_round2 = "Vote share of the candidate who is elected as mayor in the second round, when one is held",
  electoral_cycle = "Electoral cycle",
  electoral_cycle_2016 = "Indicator for the electoral cycle around the 2016 elections",
  electoral_cycle_2012 = "Indicator for the electoral cycle around the 2012 elections",
  incumbent_party_pt = "Indicator for whether the incumbent mayor belongs to PT",
  incumbent_party_psdb = "Indicator for whether the incumbent mayor belongs to PSDB",
  incumbent_party_pmdb = "Indicator for whether the incumbent mayor belongs to PMDB",
  incumbent_mayor_voteshare_previous = "Vote share of the incumbent mayor in the previous election",
  ideb = "Municipal IDEB score for municipal, primary schools (with rounding)",
  ideb_cont = "Municipal IDEB score for municipal, primary schools (without rounding)",
  ideb_target = "Municipal IDEB target for municipal, primary schools",
  ideb_gap = "Difference between the continuous IDEB score and the IDEB target, i.e., ideb_cont - ideb_target",
  ideb_gap_centered = "Difference between the continuous IDEB score and the IDEB target centered around zero, i.e., ideb_cont - ideb_target + 0.05",
  ideb_ef2 = "Municipal IDEB score for municipal, middle schools (with rounding)",
  ideb_cont_ef2 = "Municipal IDEB score for municipal, middle schools (without rounding)",
  ideb_target_ef2 = "Municipal IDEB target for municipal, middle schools",
  ideb_gap_ef2 = "Difference between the continuous IDEB score and the IDEB target for middle schools",
  ef1_only = "Indicator for whether the municipality lacks data for ideb_gap_ef2",
  population = "Population of the municipality",
  population_log = "Logged population of the municipality",
  tenured_employees_share = "Proportion of municipal employees who are tenured",
  share_poor = "Proportion of the municipality living below the poverty line (R$140 in 2010)",
  local_media_2012 = "Number of types of local media outlets existing in the municipality",
  enrolment_mun_ef1 = "Number of children enrolled in municipal, primary schools",
  treated = "Indicator for whether the municipality met its IDEB target, i.e. whether ideb_gap_centered >= 0",
  share_pop_enroled_mun = "Number of children enrolled in municipal schools, relative to total population",
  share_pop_enroled_mun_high = "Indicator for whether the number of children enrolled in municipal schools, relative to total population, is in the upper quartile",
  treated_share_pop_enroled_mun_high = "Indicator for whether the municipality met its IDEB target and whether the number of children enrolled in municipal schools, relative to total population, is in the upper quartile",
  treated_forcing_share_pop_enroled_mun_high = "Difference between the continuous IDEB score and the IDEB target, multiplied by an indicator for whether the municipality met its IDEB target and whether the number of children enrolled in municipal schools, relative to total population, is in the upper quartile",
  forcing_share_pop_enroled_mun_high = "Continuous IDEB score multiplied by indicator for whether the number of children enrolled in municipal schools, relative to total population, is in the upper quartile"
)

metadata(m)$name <- "rdd_dataset.csv"
metadata(m)$description <- "Codebook for the dataset used for all RDD analyses in Competence versus Priorities: Negative Electoral Responses to Education Quality in Brazil"

codebook(m)

codebook:::label_browser_static(m)

# NOTES -- R version, platform, and loaded packages -------------------------
# sessionInfo(package = NULL)
# R version 3.6.3 (2020-02-29)
# Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS Catalina 10.15.3
# 
# Matrix products: default
# BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
# LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
# 
# Random number generation:
#   RNG:     Mersenne-Twister 
# Normal:  Inversion 
# Sample:  Rounding 
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] Hmisc_4.4-0       lattice_0.20-40   timelineS_0.1.1   lfe_2.8-5         Matrix_1.2-18    
# [6] xtable_1.8-4      texreg_1.37.1     rdrobust_0.99.7   rdd_0.57          Formula_1.2-3    
# [11] AER_1.2-9         survival_3.1-11   car_3.0-7         carData_3.0-3     lmtest_0.9-37    
# [16] zoo_1.8-7         sandwich_2.5-1    shiny_1.4.0.2     readxl_1.3.1      codebook_0.9.2   
# [21] electionsBR_0.3.1 forcats_0.5.0     stringr_1.4.0     dplyr_1.0.0       purrr_0.3.3      
# [26] readr_1.3.1       tidyr_1.0.2       tibble_3.0.0      ggplot2_3.3.0     tidyverse_1.3.0  
# 
# loaded via a namespace (and not attached):
#   [1] colorspace_1.4-1    ellipsis_0.3.0      rio_0.5.16          htmlTable_1.13.3   
# [5] base64enc_0.1-3     fs_1.4.1            rstudioapi_0.11     listenv_0.8.0      
# [9] farver_2.0.3        DT_0.13             fansi_0.4.1         lubridate_1.7.4    
# [13] xml2_1.3.0          codetools_0.2-16    splines_3.6.3       knitr_1.28         
# [17] jsonlite_1.6.1      broom_0.5.5         cluster_2.1.0       dbplyr_1.4.2       
# [21] png_0.1-7           compiler_3.6.3      httr_1.4.1          backports_1.1.5    
# [25] assertthat_0.2.1    fastmap_1.0.1       cli_2.0.2           later_1.0.0        
# [29] acepack_1.4.1       htmltools_0.4.0     tools_3.6.3         gtable_0.3.0       
# [33] glue_1.3.2          Rcpp_1.0.4          cellranger_1.1.0    vctrs_0.3.1        
# [37] nlme_3.1-145        crosstalk_1.1.0.1   xfun_0.12           globals_0.12.5     
# [41] openxlsx_4.1.4      rvest_0.3.5         mime_0.9            miniUI_0.1.1.1     
# [45] lifecycle_0.2.0     future_1.17.0       scales_1.1.0        hms_0.5.3          
# [49] promises_1.1.0      parallel_3.6.3      RColorBrewer_1.1-2  yaml_2.2.1         
# [53] curl_4.3            gridExtra_2.3       rpart_4.1-15        labelled_2.5.0     
# [57] latticeExtra_0.6-29 stringi_1.4.6       highr_0.8           checkmate_2.0.0    
# [61] rmdpartials_0.5.8   zip_2.0.4           repr_1.1.0          rlang_0.4.6        
# [65] pkgconfig_2.0.3     evaluate_0.14       htmlwidgets_1.5.1   labeling_0.3       
# [69] tidyselect_1.1.0    magrittr_1.5        R6_2.4.1            generics_0.0.2     
# [73] DBI_1.1.0           pillar_1.4.3        haven_2.3.1         foreign_0.8-76     
# [77] withr_2.1.2         nnet_7.3-13         abind_1.4-5         modelr_0.1.6       
# [81] crayon_1.3.4        utf8_1.1.4          rmarkdown_2.1       jpeg_0.1-8.1       
# [85] grid_3.6.3          data.table_1.12.8   reprex_0.3.0        digest_0.6.25      
# [89] httpuv_1.5.2        munsell_0.5.0       skimr_2.1.1      